{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "## 0. 打印设置\n",
    "pd.set_option('display.max_columns', None)\n",
    "# pd.set_option('display.max_rows', None)  ## 显示全部结果，不带省略点\n",
    "# pd.set_option('display.width', 1000)\n",
    "pd.set_option('display.float_format', '{:.0f}'.format)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = pd.read_csv(r\"data/train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>policy_id</th>\n",
       "      <th>age</th>\n",
       "      <th>customer_months</th>\n",
       "      <th>policy_bind_date</th>\n",
       "      <th>policy_state</th>\n",
       "      <th>policy_csl</th>\n",
       "      <th>policy_deductable</th>\n",
       "      <th>policy_annual_premium</th>\n",
       "      <th>umbrella_limit</th>\n",
       "      <th>insured_zip</th>\n",
       "      <th>insured_sex</th>\n",
       "      <th>insured_education_level</th>\n",
       "      <th>insured_occupation</th>\n",
       "      <th>insured_hobbies</th>\n",
       "      <th>insured_relationship</th>\n",
       "      <th>capital-gains</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>incident_date</th>\n",
       "      <th>incident_type</th>\n",
       "      <th>collision_type</th>\n",
       "      <th>incident_severity</th>\n",
       "      <th>authorities_contacted</th>\n",
       "      <th>incident_state</th>\n",
       "      <th>incident_city</th>\n",
       "      <th>incident_hour_of_the_day</th>\n",
       "      <th>number_of_vehicles_involved</th>\n",
       "      <th>property_damage</th>\n",
       "      <th>bodily_injuries</th>\n",
       "      <th>witnesses</th>\n",
       "      <th>police_report_available</th>\n",
       "      <th>total_claim_amount</th>\n",
       "      <th>injury_claim</th>\n",
       "      <th>property_claim</th>\n",
       "      <th>vehicle_claim</th>\n",
       "      <th>auto_make</th>\n",
       "      <th>auto_model</th>\n",
       "      <th>auto_year</th>\n",
       "      <th>fraud</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122576</td>\n",
       "      <td>37</td>\n",
       "      <td>189</td>\n",
       "      <td>2013/8/21</td>\n",
       "      <td>C</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1466</td>\n",
       "      <td>5000000</td>\n",
       "      <td>455456</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>Masters</td>\n",
       "      <td>protective-serv</td>\n",
       "      <td>reading</td>\n",
       "      <td>not-in-family</td>\n",
       "      <td>62203</td>\n",
       "      <td>0</td>\n",
       "      <td>2014/12/22</td>\n",
       "      <td>Single Vehicle Collision</td>\n",
       "      <td>Side Collision</td>\n",
       "      <td>Total Loss</td>\n",
       "      <td>Ambulance</td>\n",
       "      <td>S5</td>\n",
       "      <td>Riverwood</td>\n",
       "      <td>21</td>\n",
       "      <td>1</td>\n",
       "      <td>?</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>?</td>\n",
       "      <td>54930</td>\n",
       "      <td>6029</td>\n",
       "      <td>5752</td>\n",
       "      <td>44452</td>\n",
       "      <td>Nissan</td>\n",
       "      <td>Maxima</td>\n",
       "      <td>2000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>937713</td>\n",
       "      <td>44</td>\n",
       "      <td>234</td>\n",
       "      <td>1998/1/4</td>\n",
       "      <td>B</td>\n",
       "      <td>250/500</td>\n",
       "      <td>500</td>\n",
       "      <td>821</td>\n",
       "      <td>0</td>\n",
       "      <td>591805</td>\n",
       "      <td>MALE</td>\n",
       "      <td>JD</td>\n",
       "      <td>craft-repair</td>\n",
       "      <td>polo</td>\n",
       "      <td>other-relative</td>\n",
       "      <td>31606</td>\n",
       "      <td>0</td>\n",
       "      <td>2015/2/18</td>\n",
       "      <td>Multi-vehicle Collision</td>\n",
       "      <td>Side Collision</td>\n",
       "      <td>Minor Damage</td>\n",
       "      <td>Other</td>\n",
       "      <td>S5</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>?</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>YES</td>\n",
       "      <td>50680</td>\n",
       "      <td>5376</td>\n",
       "      <td>10156</td>\n",
       "      <td>37347</td>\n",
       "      <td>Honda</td>\n",
       "      <td>Civic</td>\n",
       "      <td>1996</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>680237</td>\n",
       "      <td>33</td>\n",
       "      <td>23</td>\n",
       "      <td>1996/2/6</td>\n",
       "      <td>B</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>1000</td>\n",
       "      <td>1844</td>\n",
       "      <td>0</td>\n",
       "      <td>442490</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>High School</td>\n",
       "      <td>machine-op-inspct</td>\n",
       "      <td>skydiving</td>\n",
       "      <td>wife</td>\n",
       "      <td>0</td>\n",
       "      <td>-43166</td>\n",
       "      <td>2015/1/18</td>\n",
       "      <td>Single Vehicle Collision</td>\n",
       "      <td>Side Collision</td>\n",
       "      <td>Total Loss</td>\n",
       "      <td>Police</td>\n",
       "      <td>S3</td>\n",
       "      <td>Northbend</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>?</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NO</td>\n",
       "      <td>47829</td>\n",
       "      <td>4460</td>\n",
       "      <td>9247</td>\n",
       "      <td>33644</td>\n",
       "      <td>Jeep</td>\n",
       "      <td>Wrangler</td>\n",
       "      <td>2002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>513080</td>\n",
       "      <td>42</td>\n",
       "      <td>210</td>\n",
       "      <td>2008/11/14</td>\n",
       "      <td>A</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>500</td>\n",
       "      <td>1867</td>\n",
       "      <td>0</td>\n",
       "      <td>439408</td>\n",
       "      <td>MALE</td>\n",
       "      <td>JD</td>\n",
       "      <td>transport-moving</td>\n",
       "      <td>video-games</td>\n",
       "      <td>own-child</td>\n",
       "      <td>0</td>\n",
       "      <td>-49440</td>\n",
       "      <td>2015/2/2</td>\n",
       "      <td>Multi-vehicle Collision</td>\n",
       "      <td>Front Collision</td>\n",
       "      <td>Major Damage</td>\n",
       "      <td>Fire</td>\n",
       "      <td>S3</td>\n",
       "      <td>Northbend</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>YES</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>YES</td>\n",
       "      <td>68862</td>\n",
       "      <td>11043</td>\n",
       "      <td>5955</td>\n",
       "      <td>53548</td>\n",
       "      <td>Suburu</td>\n",
       "      <td>Legacy</td>\n",
       "      <td>2003</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>192875</td>\n",
       "      <td>29</td>\n",
       "      <td>81</td>\n",
       "      <td>2002/1/8</td>\n",
       "      <td>A</td>\n",
       "      <td>100/300</td>\n",
       "      <td>1000</td>\n",
       "      <td>816</td>\n",
       "      <td>0</td>\n",
       "      <td>640575</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>MD</td>\n",
       "      <td>craft-repair</td>\n",
       "      <td>video-games</td>\n",
       "      <td>own-child</td>\n",
       "      <td>75296</td>\n",
       "      <td>-73689</td>\n",
       "      <td>2015/2/9</td>\n",
       "      <td>Multi-vehicle Collision</td>\n",
       "      <td>Rear Collision</td>\n",
       "      <td>Total Loss</td>\n",
       "      <td>Fire</td>\n",
       "      <td>S2</td>\n",
       "      <td>Northbend</td>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>YES</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>YES</td>\n",
       "      <td>59726</td>\n",
       "      <td>5617</td>\n",
       "      <td>10301</td>\n",
       "      <td>41550</td>\n",
       "      <td>Ford</td>\n",
       "      <td>F150</td>\n",
       "      <td>2004</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>695</th>\n",
       "      <td>1008425</td>\n",
       "      <td>37</td>\n",
       "      <td>196</td>\n",
       "      <td>1997/6/29</td>\n",
       "      <td>C</td>\n",
       "      <td>250/500</td>\n",
       "      <td>500</td>\n",
       "      <td>1301</td>\n",
       "      <td>0</td>\n",
       "      <td>474615</td>\n",
       "      <td>MALE</td>\n",
       "      <td>JD</td>\n",
       "      <td>tech-support</td>\n",
       "      <td>video-games</td>\n",
       "      <td>wife</td>\n",
       "      <td>47627</td>\n",
       "      <td>0</td>\n",
       "      <td>2015/1/18</td>\n",
       "      <td>Single Vehicle Collision</td>\n",
       "      <td>Front Collision</td>\n",
       "      <td>Major Damage</td>\n",
       "      <td>Ambulance</td>\n",
       "      <td>S5</td>\n",
       "      <td>Columbus</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>?</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>NO</td>\n",
       "      <td>61433</td>\n",
       "      <td>10436</td>\n",
       "      <td>11432</td>\n",
       "      <td>39745</td>\n",
       "      <td>Nissan</td>\n",
       "      <td>Pathfinder</td>\n",
       "      <td>2011</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>696</th>\n",
       "      <td>770702</td>\n",
       "      <td>43</td>\n",
       "      <td>229</td>\n",
       "      <td>2001/5/29</td>\n",
       "      <td>A</td>\n",
       "      <td>250/500</td>\n",
       "      <td>500</td>\n",
       "      <td>1435</td>\n",
       "      <td>8000000</td>\n",
       "      <td>444476</td>\n",
       "      <td>MALE</td>\n",
       "      <td>College</td>\n",
       "      <td>machine-op-inspct</td>\n",
       "      <td>golf</td>\n",
       "      <td>husband</td>\n",
       "      <td>0</td>\n",
       "      <td>-32289</td>\n",
       "      <td>2015/1/13</td>\n",
       "      <td>Multi-vehicle Collision</td>\n",
       "      <td>Rear Collision</td>\n",
       "      <td>Major Damage</td>\n",
       "      <td>Ambulance</td>\n",
       "      <td>S1</td>\n",
       "      <td>Arlington</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>NO</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>?</td>\n",
       "      <td>68623</td>\n",
       "      <td>6798</td>\n",
       "      <td>14557</td>\n",
       "      <td>50606</td>\n",
       "      <td>Volkswagen</td>\n",
       "      <td>Passat</td>\n",
       "      <td>2013</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>697</th>\n",
       "      <td>755099</td>\n",
       "      <td>35</td>\n",
       "      <td>209</td>\n",
       "      <td>2003/1/11</td>\n",
       "      <td>C</td>\n",
       "      <td>100/300</td>\n",
       "      <td>500</td>\n",
       "      <td>1639</td>\n",
       "      <td>0</td>\n",
       "      <td>639608</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>College</td>\n",
       "      <td>transport-moving</td>\n",
       "      <td>golf</td>\n",
       "      <td>not-in-family</td>\n",
       "      <td>0</td>\n",
       "      <td>-40797</td>\n",
       "      <td>2015/3/5</td>\n",
       "      <td>Multi-vehicle Collision</td>\n",
       "      <td>Rear Collision</td>\n",
       "      <td>Minor Damage</td>\n",
       "      <td>Fire</td>\n",
       "      <td>S2</td>\n",
       "      <td>Riverwood</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>NO</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>YES</td>\n",
       "      <td>58033</td>\n",
       "      <td>9129</td>\n",
       "      <td>4598</td>\n",
       "      <td>40740</td>\n",
       "      <td>Mercedes</td>\n",
       "      <td>C300</td>\n",
       "      <td>2002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>698</th>\n",
       "      <td>693804</td>\n",
       "      <td>44</td>\n",
       "      <td>275</td>\n",
       "      <td>2003/7/22</td>\n",
       "      <td>B</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>2000</td>\n",
       "      <td>1042</td>\n",
       "      <td>0</td>\n",
       "      <td>432061</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>Associate</td>\n",
       "      <td>machine-op-inspct</td>\n",
       "      <td>paintball</td>\n",
       "      <td>other-relative</td>\n",
       "      <td>46822</td>\n",
       "      <td>0</td>\n",
       "      <td>2015/1/9</td>\n",
       "      <td>Multi-vehicle Collision</td>\n",
       "      <td>Rear Collision</td>\n",
       "      <td>Major Damage</td>\n",
       "      <td>Ambulance</td>\n",
       "      <td>S5</td>\n",
       "      <td>Northbend</td>\n",
       "      <td>20</td>\n",
       "      <td>3</td>\n",
       "      <td>?</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NO</td>\n",
       "      <td>35253</td>\n",
       "      <td>7359</td>\n",
       "      <td>3464</td>\n",
       "      <td>24677</td>\n",
       "      <td>Audi</td>\n",
       "      <td>A3</td>\n",
       "      <td>2007</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>699</th>\n",
       "      <td>598086</td>\n",
       "      <td>47</td>\n",
       "      <td>263</td>\n",
       "      <td>1996/8/15</td>\n",
       "      <td>C</td>\n",
       "      <td>500/1000</td>\n",
       "      <td>500</td>\n",
       "      <td>1283</td>\n",
       "      <td>0</td>\n",
       "      <td>433809</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>High School</td>\n",
       "      <td>machine-op-inspct</td>\n",
       "      <td>sleeping</td>\n",
       "      <td>wife</td>\n",
       "      <td>54087</td>\n",
       "      <td>-61343</td>\n",
       "      <td>2015/1/8</td>\n",
       "      <td>Multi-vehicle Collision</td>\n",
       "      <td>Side Collision</td>\n",
       "      <td>Total Loss</td>\n",
       "      <td>Police</td>\n",
       "      <td>S4</td>\n",
       "      <td>Hillsdale</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>?</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NO</td>\n",
       "      <td>24320</td>\n",
       "      <td>2250</td>\n",
       "      <td>4285</td>\n",
       "      <td>18092</td>\n",
       "      <td>Suburu</td>\n",
       "      <td>Forrestor</td>\n",
       "      <td>2008</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>700 rows × 38 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     policy_id  age  customer_months policy_bind_date policy_state policy_csl  \\\n",
       "0       122576   37              189        2013/8/21            C   500/1000   \n",
       "1       937713   44              234         1998/1/4            B    250/500   \n",
       "2       680237   33               23         1996/2/6            B   500/1000   \n",
       "3       513080   42              210       2008/11/14            A   500/1000   \n",
       "4       192875   29               81         2002/1/8            A    100/300   \n",
       "..         ...  ...              ...              ...          ...        ...   \n",
       "695    1008425   37              196        1997/6/29            C    250/500   \n",
       "696     770702   43              229        2001/5/29            A    250/500   \n",
       "697     755099   35              209        2003/1/11            C    100/300   \n",
       "698     693804   44              275        2003/7/22            B   500/1000   \n",
       "699     598086   47              263        1996/8/15            C   500/1000   \n",
       "\n",
       "     policy_deductable  policy_annual_premium  umbrella_limit  insured_zip  \\\n",
       "0                 1000                   1466         5000000       455456   \n",
       "1                  500                    821               0       591805   \n",
       "2                 1000                   1844               0       442490   \n",
       "3                  500                   1867               0       439408   \n",
       "4                 1000                    816               0       640575   \n",
       "..                 ...                    ...             ...          ...   \n",
       "695                500                   1301               0       474615   \n",
       "696                500                   1435         8000000       444476   \n",
       "697                500                   1639               0       639608   \n",
       "698               2000                   1042               0       432061   \n",
       "699                500                   1283               0       433809   \n",
       "\n",
       "    insured_sex insured_education_level insured_occupation insured_hobbies  \\\n",
       "0        FEMALE                 Masters    protective-serv         reading   \n",
       "1          MALE                      JD       craft-repair            polo   \n",
       "2        FEMALE             High School  machine-op-inspct       skydiving   \n",
       "3          MALE                      JD   transport-moving     video-games   \n",
       "4        FEMALE                      MD       craft-repair     video-games   \n",
       "..          ...                     ...                ...             ...   \n",
       "695        MALE                      JD       tech-support     video-games   \n",
       "696        MALE                 College  machine-op-inspct            golf   \n",
       "697      FEMALE                 College   transport-moving            golf   \n",
       "698      FEMALE               Associate  machine-op-inspct       paintball   \n",
       "699      FEMALE             High School  machine-op-inspct        sleeping   \n",
       "\n",
       "    insured_relationship  capital-gains  capital-loss incident_date  \\\n",
       "0          not-in-family          62203             0    2014/12/22   \n",
       "1         other-relative          31606             0     2015/2/18   \n",
       "2                   wife              0        -43166     2015/1/18   \n",
       "3              own-child              0        -49440      2015/2/2   \n",
       "4              own-child          75296        -73689      2015/2/9   \n",
       "..                   ...            ...           ...           ...   \n",
       "695                 wife          47627             0     2015/1/18   \n",
       "696              husband              0        -32289     2015/1/13   \n",
       "697        not-in-family              0        -40797      2015/3/5   \n",
       "698       other-relative          46822             0      2015/1/9   \n",
       "699                 wife          54087        -61343      2015/1/8   \n",
       "\n",
       "                incident_type   collision_type incident_severity  \\\n",
       "0    Single Vehicle Collision   Side Collision        Total Loss   \n",
       "1     Multi-vehicle Collision   Side Collision      Minor Damage   \n",
       "2    Single Vehicle Collision   Side Collision        Total Loss   \n",
       "3     Multi-vehicle Collision  Front Collision      Major Damage   \n",
       "4     Multi-vehicle Collision   Rear Collision        Total Loss   \n",
       "..                        ...              ...               ...   \n",
       "695  Single Vehicle Collision  Front Collision      Major Damage   \n",
       "696   Multi-vehicle Collision   Rear Collision      Major Damage   \n",
       "697   Multi-vehicle Collision   Rear Collision      Minor Damage   \n",
       "698   Multi-vehicle Collision   Rear Collision      Major Damage   \n",
       "699   Multi-vehicle Collision   Side Collision        Total Loss   \n",
       "\n",
       "    authorities_contacted incident_state incident_city  \\\n",
       "0               Ambulance             S5     Riverwood   \n",
       "1                   Other             S5   Springfield   \n",
       "2                  Police             S3     Northbend   \n",
       "3                    Fire             S3     Northbend   \n",
       "4                    Fire             S2     Northbend   \n",
       "..                    ...            ...           ...   \n",
       "695             Ambulance             S5      Columbus   \n",
       "696             Ambulance             S1     Arlington   \n",
       "697                  Fire             S2     Riverwood   \n",
       "698             Ambulance             S5     Northbend   \n",
       "699                Police             S4     Hillsdale   \n",
       "\n",
       "     incident_hour_of_the_day  number_of_vehicles_involved property_damage  \\\n",
       "0                          21                            1               ?   \n",
       "1                           4                            3               ?   \n",
       "2                           0                            1               ?   \n",
       "3                          20                            3             YES   \n",
       "4                           9                            3             YES   \n",
       "..                        ...                          ...             ...   \n",
       "695                         4                            1               ?   \n",
       "696                        17                            3              NO   \n",
       "697                         7                            3              NO   \n",
       "698                        20                            3               ?   \n",
       "699                         5                            3               ?   \n",
       "\n",
       "     bodily_injuries  witnesses police_report_available  total_claim_amount  \\\n",
       "0                  0          3                       ?               54930   \n",
       "1                  2          1                     YES               50680   \n",
       "2                  2          1                      NO               47829   \n",
       "3                  2          2                     YES               68862   \n",
       "4                  2          1                     YES               59726   \n",
       "..               ...        ...                     ...                 ...   \n",
       "695                0          3                      NO               61433   \n",
       "696                0          1                       ?               68623   \n",
       "697                2          0                     YES               58033   \n",
       "698                1          0                      NO               35253   \n",
       "699                0          0                      NO               24320   \n",
       "\n",
       "     injury_claim  property_claim  vehicle_claim   auto_make  auto_model  \\\n",
       "0            6029            5752          44452      Nissan      Maxima   \n",
       "1            5376           10156          37347       Honda       Civic   \n",
       "2            4460            9247          33644        Jeep    Wrangler   \n",
       "3           11043            5955          53548      Suburu      Legacy   \n",
       "4            5617           10301          41550        Ford        F150   \n",
       "..            ...             ...            ...         ...         ...   \n",
       "695         10436           11432          39745      Nissan  Pathfinder   \n",
       "696          6798           14557          50606  Volkswagen      Passat   \n",
       "697          9129            4598          40740    Mercedes        C300   \n",
       "698          7359            3464          24677        Audi          A3   \n",
       "699          2250            4285          18092      Suburu   Forrestor   \n",
       "\n",
       "     auto_year  fraud  \n",
       "0         2000      0  \n",
       "1         1996      0  \n",
       "2         2002      0  \n",
       "3         2003      1  \n",
       "4         2004      0  \n",
       "..         ...    ...  \n",
       "695       2011      1  \n",
       "696       2013      1  \n",
       "697       2002      0  \n",
       "698       2007      1  \n",
       "699       2008      0  \n",
       "\n",
       "[700 rows x 38 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "policy_id                      0\n",
       "age                            0\n",
       "customer_months                0\n",
       "policy_bind_date               0\n",
       "policy_state                   0\n",
       "policy_csl                     0\n",
       "policy_deductable              0\n",
       "policy_annual_premium          0\n",
       "umbrella_limit                 0\n",
       "insured_zip                    0\n",
       "insured_sex                    0\n",
       "insured_education_level        0\n",
       "insured_occupation             0\n",
       "insured_hobbies                0\n",
       "insured_relationship           0\n",
       "capital-gains                  0\n",
       "capital-loss                   0\n",
       "incident_date                  0\n",
       "incident_type                  0\n",
       "collision_type                 0\n",
       "incident_severity              0\n",
       "authorities_contacted          0\n",
       "incident_state                 0\n",
       "incident_city                  0\n",
       "incident_hour_of_the_day       0\n",
       "number_of_vehicles_involved    0\n",
       "property_damage                0\n",
       "bodily_injuries                0\n",
       "witnesses                      0\n",
       "police_report_available        0\n",
       "total_claim_amount             0\n",
       "injury_claim                   0\n",
       "property_claim                 0\n",
       "vehicle_claim                  0\n",
       "auto_make                      0\n",
       "auto_model                     0\n",
       "auto_year                      0\n",
       "fraud                          0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 空值数量\n",
    "X_train.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>policy_id</th>\n",
       "      <th>age</th>\n",
       "      <th>customer_months</th>\n",
       "      <th>policy_bind_date</th>\n",
       "      <th>policy_state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>122576</td>\n",
       "      <td>37</td>\n",
       "      <td>189</td>\n",
       "      <td>2013/8/21</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>937713</td>\n",
       "      <td>44</td>\n",
       "      <td>234</td>\n",
       "      <td>1998/1/4</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>680237</td>\n",
       "      <td>33</td>\n",
       "      <td>23</td>\n",
       "      <td>1996/2/6</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>513080</td>\n",
       "      <td>42</td>\n",
       "      <td>210</td>\n",
       "      <td>2008/11/14</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>192875</td>\n",
       "      <td>29</td>\n",
       "      <td>81</td>\n",
       "      <td>2002/1/8</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>695</th>\n",
       "      <td>1008425</td>\n",
       "      <td>37</td>\n",
       "      <td>196</td>\n",
       "      <td>1997/6/29</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>696</th>\n",
       "      <td>770702</td>\n",
       "      <td>43</td>\n",
       "      <td>229</td>\n",
       "      <td>2001/5/29</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>697</th>\n",
       "      <td>755099</td>\n",
       "      <td>35</td>\n",
       "      <td>209</td>\n",
       "      <td>2003/1/11</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>698</th>\n",
       "      <td>693804</td>\n",
       "      <td>44</td>\n",
       "      <td>275</td>\n",
       "      <td>2003/7/22</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>699</th>\n",
       "      <td>598086</td>\n",
       "      <td>47</td>\n",
       "      <td>263</td>\n",
       "      <td>1996/8/15</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>700 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     policy_id  age  customer_months policy_bind_date policy_state\n",
       "0       122576   37              189        2013/8/21            C\n",
       "1       937713   44              234         1998/1/4            B\n",
       "2       680237   33               23         1996/2/6            B\n",
       "3       513080   42              210       2008/11/14            A\n",
       "4       192875   29               81         2002/1/8            A\n",
       "..         ...  ...              ...              ...          ...\n",
       "695    1008425   37              196        1997/6/29            C\n",
       "696     770702   43              229        2001/5/29            A\n",
       "697     755099   35              209        2003/1/11            C\n",
       "698     693804   44              275        2003/7/22            B\n",
       "699     598086   47              263        1996/8/15            C\n",
       "\n",
       "[700 rows x 5 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train[['policy_id','age','customer_months','policy_bind_date','policy_state']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
